Microglia samples | PD and CT | Limma with Duplicate Correlation.
## Warning: package 'S4Vectors' was built under R version 3.6.3
## Warning: package 'DelayedArray' was built under R version 3.6.3
# gene_lists = "~/pd-omics/katia/Microglia/GeneLists_4analysis/"
work_dir = "~/pd-omics/katia/MyND/mynd_rev1/microglia_PDxCT/"
expression_dir = "~/pd-omics/katia/MyND/mynd_rev1/microglia_PDxCT/"
metadata_path = "~/pd-omics/katia/MyND/mynd_rev1/microglia_PDxCT/join_metadata/"## Input data
load(paste0(metadata_path, "metadata_PD_CT_mic.Rdata"))
# Transforms characters columns in factors columns
indx <- sapply(join_metadata, is.character)
join_metadata[indx] <- lapply(join_metadata[indx], function(x) as.factor(x))
join_metadata$ph = as.numeric(as.character(join_metadata$ph))
# str(join_metadata) #shows class for all columns
rownames(join_metadata) = join_metadata$donor_tissue
load(paste0(expression_dir, "expression_data/mic_pd_ct_filt.Rdata"))
# dim(counts1) #17489 154
samples_case = as.character(join_metadata$donor_tissue[join_metadata$main_diagnosis %in% "Parkinson"])
samples_control = as.character(join_metadata$donor_tissue[join_metadata$main_diagnosis %in% "Control"])
# We don't know the tissues of these 2 samples, so I'll remove it for the DE analysis. I canot fit a model for it.
# MG-03-RNA
# MG-02-RNA
counts1 = counts1[, !colnames(counts1) %in% "MG-03-RNA"]
join_metadata = join_metadata[!rownames(join_metadata) %in% "MG-03-RNA", ]
samples_control = samples_control[!samples_control %in% "MG-03-RNA"]
counts1 = counts1[, !colnames(counts1) %in% "MG-02-RNA"]
join_metadata = join_metadata[!rownames(join_metadata) %in% "MG-02-RNA", ]
samples_control = samples_control[!samples_control %in% "MG-02-RNA"]
# table(join_metadata$main_diagnosis)[1] “13-066-CC” “14-074-CC” “14-078-CC”
[4] “14-078-GFM” “15-024-GFM” “15-024-THA”
[7] “15-041-THA” “15-097-GTS” “15-110-GTS”
[10] “16-016-GTS” “16-016-THA” “16-102-GFM”
[13] “17-012-GFM” “17-012-SVZ” “17-012-THA”
[16] “MG-06-GFM” “MG-06-SVZ” “MG-08-GFM”
[19] “MG-08-SVZ” “MG-13-GFM” “MG-13-SN”
[22] “MG-13-SVZ” “MG-12-MGF-RNA” “MG-12-SVZ-RNA”
[25] “MG-14-CER-RNA” “MG-14-MGF-RNA” “MG-15-CER-RNA”
[28] “MG-15-MGF-RNA” “MG-22-HIP-RNA” “MG-22-MGF-RNA”
[31] “MG-22-SN-RNA” “MG-25-HIP-RNA” “MG-25-MGF-RNA”
[34] “MG-26-DLPFC-RNA” “MG-26-OCC-CORTEX-RNA” “MG-26-SN-RNA”
[1] “13-095-CC” “14-005-CC” “14-005-GFM”
[4] “14-005-GTS” “14-015-CC” “14-015-GFM”
[7] “14-015-GTS” “14-029-CC” “14-029-GFM”
[10] “14-029-GTS” “14-051-CC” “14-051-GTS”
[13] “14-053-CC” “14-055-CC” “14-063-CC”
[16] “14-069-CC” “14-069-GFM” “14-069-GTS”
[19] “14-075-CC” “14-075-GFM” “14-075-GTS”
[22] “15-018-GFM” “15-018-GTS” “15-027-GFM”
[25] “15-034-GFM” “15-055-GFM” “15-055-GTS”
[28] “15-074-THA” “15-087-GFM” “15-087-GTS”
[31] “15-087-THA” “15-089-GFM” “15-089-GTS”
[34] “15-089-THA” “16-027-GFM” “16-027-GTS”
[37] “16-038-GFM” “16-046-GFM” “16-046-GTS”
[40] “16-046-THA” “16-056-GFM” “16-056-THA”
[43] “16-067-GFM” “16-067-GTS” “16-067-THA”
[46] “16-078-GFM” “16-078-GTS” “16-078-THA”
[49] “16-080-GFM” “16-080-GTS” “16-080-SVZ”
[52] “16-080-THA” “16-082-SVZ” “16-116-GFM”
[55] “16-116-GTS” “16-137-GFM” “16-137-GTS”
[58] “16-137-SVZ” “16-137-THA” “17-003-GFM”
[61] “17-003-GTS” “17-003-SVZ” “17-003-THA”
[64] “17-005-CER” “17-005-GFM” “17-005-GTS”
[67] “17-005-SVZ” “17-005-THA” “17-016-CER”
[70] “17-016-GFM” “17-016-GTS” “17-016-SVZ”
[73] “17-016-THA” “17-078-GFM” “17-078-GTS”
[76] “17-078-SVZ” “17-078-THA” “17-097-GFM”
[79] “17-097-GTS” “17-097-SVZ” “17-124-GFM”
[82] “17-124-GTS” “17-124-SVZ” “17-124-THA”
[85] “18-018-GFM” “18-018-SVZ” “18-018-THA”
[88] “18-021-GFM” “18-021-GTS” “18-021-SVZ”
[91] “18-021-THA” “18-064-GFM” “18-064-GTS”
[94] “18-064-SVZ” “18-064-THA” “18-105-GFM”
[97] “18-105-GTS” “18-105-THA” “18-112-GFM”
[100] “18-112-GTS” “18-112-THA” “MG-01-SVZ”
[103] “MG-03-SVZ” “MG-04-GFM” “MG-09-Cerebellum” [106] “MG-09-GFM” “MG-03-MGF-RNA” “MG-04-SVZ-RNA”
[109] “MG-09-SVZ-RNA” “MG-16-MGF-RNA” “MG-19-HIP-RNA”
[112] “MG-19-MGF-RNA” “MG-19-SN-RNA” “MG-20-GFM-RNA”
[115] “MG-20-MGF-RNA” “MG-20-SN-RNA”
metadata4deg <- join_metadata
genes_counts4deg <- counts1
metadata4deg$cause_of_death_categories[metadata4deg$cause_of_death_categories %in% NA] <- "Other"
design.dupcor1 <- model.matrix(~ main_diagnosis + pmd_minutes + batch + cause_of_death_categories + sex + age + tissue + picard_pct_mrna_bases + picard_pct_pf_reads_aligned + picard_pct_ribosomal_bases, data = metadata4deg)
genes_counts4deg <- DGEList(genes_counts4deg)
genes_counts4deg <- calcNormFactors(genes_counts4deg)
genes_voom4deg <- voom(genes_counts4deg, design.dupcor1)
# Estimate linear mixed model
# Fit the model for each gene
dupcor <- duplicateCorrelation(genes_voom4deg, design.dupcor1, block = metadata4deg$donor_id)
# run voom considering the duplicateCorrelation results
vobj <- voom(genes_counts4deg, design.dupcor1, block=metadata4deg$donor_id, correlation=dupcor$consensus)
# Estimate linear mixed model with a single variance component
# Fit the model for each gene
dupcor2 <- duplicateCorrelation(vobj, design.dupcor1, block = metadata4deg$donor_id)
fitDupcor <- lmFit(vobj, design.dupcor1, block = metadata4deg$donor_id, correlation = dupcor2$consensus)
fitDupcor <- eBayes(fitDupcor)
# topTable(fitDupcor, coef = "main_diagnosisParkinson") # Default is 10 lines
resPD <- data.frame(topTable(fitDupcor, coef = "main_diagnosisParkinson",
number = nrow(genes_counts4deg), sort.by = "p"), check.names = F)
# design.dupcor1res = resPD
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_bw() +
ggtitle("FDR Distribution")p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_bw() +
ggtitle("Fold Change Distribution")plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.15))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.15", "< 0.15")) +
labs(title = "MA plot", color = "labels") +
theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.15", "< 0.15")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(-10,10) + ylim(0, 10) + ylab("-log10 pvalue")## Get conversion table for Gencode 30
gencode_30 = read.table("~/pd-omics/katia/ens.geneid.gencode.v30")
#gencode_30 = read.table("/Users/katia/OneDrive/Documentos/MountSinai/Projects/ens.geneid.gencode.v30", header = T)
colnames(gencode_30) = c("ensembl","symbol")
res$ensembl = rownames(res)
res_name = merge(res, gencode_30, by="ensembl")
rownames(res_name) = res_name$ensembl
res_name = res_name[order(res_name$adj.P.Val), ]
res_name = res_name[, c("symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")]
deg_lists = res_name[which(res_name$adj.P.Val<0.15),]
createDT(res_name)LogFC > 0 is up in Case.
top_6 = head(res_name)
top_6$ensembl = rownames(top_6)
rownames(metadata4deg) = metadata4deg$donor_tissue
genes_voom = genes_voom4deg
gene2check = as.data.frame( genes_voom[rownames(top_6) ,])
gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "donor_tissue")
ggplot(gene2check_charac, aes(x = main_diagnosis, y = value, fill = main_diagnosis)) +
geom_boxplot(notch = F, na.rm = T, outlier.color = NA) +
geom_jitter() +
theme_classic() +
facet_wrap(~symbol, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")R version 3.6.2 (2019-12-12) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS 10.16
Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages: [1] stats4 parallel stats graphics grDevices utils datasets [8] methods base
other attached packages: [1] reshape2_1.4.4 forcats_0.4.0
[3] stringr_1.4.0 dplyr_1.0.2
[5] purrr_0.3.4 readr_1.3.1
[7] tidyr_1.1.2 tibble_3.0.4
[9] tidyverse_1.3.0 broom_0.5.6
[11] edgeR_3.28.0 DESeq2_1.26.0
[13] SummarizedExperiment_1.16.1 DelayedArray_0.12.3
[15] BiocParallel_1.20.1 matrixStats_0.57.0
[17] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0
[19] IRanges_2.20.1 S4Vectors_0.24.4
[21] pamr_1.56.1 survival_3.1-8
[23] cluster_2.1.0 factoextra_1.0.6
[25] gplots_3.1.0 ggfortify_0.4.10
[27] variancePartition_1.17.7 Biobase_2.46.0
[29] BiocGenerics_0.32.0 scales_1.1.1
[31] foreach_1.4.8 limma_3.42.0
[33] ggplot2_3.3.2
loaded via a namespace (and not attached): [1] readxl_1.3.1 backports_1.1.8 Hmisc_4.3-0
[4] plyr_1.8.6 splines_3.6.2 crosstalk_1.1.0.1
[7] digest_0.6.27 htmltools_0.5.0 fansi_0.4.1
[10] magrittr_2.0.1 checkmate_1.9.4 memoise_1.1.0
[13] doParallel_1.0.15 annotate_1.64.0 modelr_0.1.5
[16] prettyunits_1.1.1 jpeg_0.1-8.1 colorspace_2.0-0
[19] blob_1.2.0 rvest_0.3.5 ggrepel_0.8.2
[22] haven_2.2.0 xfun_0.11 crayon_1.3.4
[25] RCurl_1.95-4.12 jsonlite_1.7.1 genefilter_1.68.0
[28] lme4_1.1-21 iterators_1.0.12 glue_1.4.2
[31] gtable_0.3.0 zlibbioc_1.32.0 XVector_0.26.0
[34] DBI_1.1.0 Rcpp_1.0.5 xtable_1.8-4
[37] progress_1.2.2 htmlTable_1.13.3 foreign_0.8-72
[40] bit_1.1-14 Formula_1.2-3 DT_0.13
[43] htmlwidgets_1.5.2 httr_1.4.2 RColorBrewer_1.1-2
[46] acepack_1.4.1 ellipsis_0.3.1 farver_2.0.3
[49] pkgconfig_2.0.3 XML_3.98-1.20 nnet_7.3-12
[52] dbplyr_1.4.2 locfit_1.5-9.1 tidyselect_1.1.0
[55] labeling_0.4.2 rlang_0.4.8 AnnotationDbi_1.48.0
[58] munsell_0.5.0 cellranger_1.1.0 tools_3.6.2
[61] cli_2.2.0 generics_0.1.0 RSQLite_2.1.5
[64] evaluate_0.14 yaml_2.2.1 knitr_1.26
[67] bit64_0.9-7 fs_1.3.1 caTools_1.18.0
[70] nlme_3.1-142 xml2_1.2.2 compiler_3.6.2
[73] pbkrtest_0.4-8.6 rstudioapi_0.13 png_0.1-7
[76] ggsignif_0.6.0 reprex_0.3.0 statmod_1.4.32
[79] geneplotter_1.64.0 stringi_1.5.3 lattice_0.20-38
[82] Matrix_1.2-18 nloptr_1.2.1 vctrs_0.3.5
[85] pillar_1.4.7 lifecycle_0.2.0 data.table_1.13.2
[88] bitops_1.0-6 colorRamps_2.3 R6_2.5.0
[91] latticeExtra_0.6-29 KernSmooth_2.23-16 gridExtra_2.3
[94] codetools_0.2-16 boot_1.3-23 MASS_7.3-51.6
[97] gtools_3.8.2 assertthat_0.2.1 withr_2.3.0
[100] GenomeInfoDbData_1.2.2 hms_0.5.3 grid_3.6.2
[103] rpart_4.1-15 minqa_1.2.4 rmarkdown_2.0
[106] ggpubr_0.2.4 lubridate_1.7.9 base64enc_0.1-3